Race

url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table23.htm"

induced_abortion = 
  read_html(url) %>%
  html_table(header = FALSE) %>%
  first() %>%
  janitor::clean_names()

data cleaning NH

clean_nh = 
  induced_abortion %>%
  select(1,3,5,7,9,11) %>% 
  purrr::set_names(c("borough", "total", "Non_Hispanic_Ratio", "NH_White_Only_Ratio", "NH_Black_Only_Ratio", "NH_Other_Ratio")) %>% 
    slice(4, 6:11) %>%
  mutate(
    borough = str_replace(borough, "Kings", "Brooklyn"),
    borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
    borough = str_replace(borough, "Richmond", "Staten Island"),
    total = str_replace(total, ",", ""),
    Non_Hispanic_Ratio = str_replace(Non_Hispanic_Ratio, ",", ""),
    NH_White_Only_Ratio = str_replace(NH_White_Only_Ratio, ",", ""),
    NH_Black_Only_Ratio = str_replace(NH_Black_Only_Ratio, ",", ""),
    NH_Other_Ratio = str_replace(NH_Other_Ratio, ",", ""),
    ) %>%
  mutate_at(c("total", "Non_Hispanic_Ratio", "NH_White_Only_Ratio", "NH_Black_Only_Ratio", "NH_Other_Ratio"), as.numeric)

write_csv(clean_nh, file = "data/abortion_race_nh.csv")

data cleaning H

clean_h = 
  induced_abortion %>%
  select(1,3,13,15,17) %>% 
  purrr::set_names(c("borough", "total", "Hispanic_Ratio", "H_White_Only_Ratio", "H_Black_Only_Ratio")) %>% 
    slice(4, 6:11) %>%
  mutate(
    borough = str_replace(borough, "Kings", "Brooklyn"),
    borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
    borough = str_replace(borough, "Richmond", "Staten Island"),
    total = str_replace(total, ",", ""),
    Hispanic_Ratio = str_replace(Hispanic_Ratio, ",", ""),
    H_White_Only_Ratio = str_replace(H_White_Only_Ratio, ",", ""),
    H_Black_Only_Ratio = str_replace(H_Black_Only_Ratio, ",", "")
    ) %>%
  mutate_at(c("total", "Hispanic_Ratio", "H_White_Only_Ratio", "H_Black_Only_Ratio"), as.numeric)

write_csv(clean_h, file = "data/abortion_race_h.csv")

ggplot: induced abortion vs race (Non-Hispanic)

total_abortion_nhrace = 
  clean_nh %>%
  slice_head(n = 1) %>%
  pivot_longer(
    total:NH_Other_Ratio,
    names_to = "race", 
    values_to = "abortion"
  )

abortion_race_nhplot = 
  clean_nh %>%
  select(-total) %>%
  pivot_longer(
    Non_Hispanic_Ratio:NH_Other_Ratio,
    names_to = "race", 
    values_to = "abortion"
  ) %>%
plot_ly(x = ~race, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>%   layout(title = 'Abortion Ratios by Non-Hispanic Race for Boroughs and New York State', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))

abortion_race_nhplot

ggplot: induced abortion vs race (Hispanic)

total_abortion_hrace = 
  clean_h %>%
  slice_head(n = 1) %>%
  pivot_longer(
    Hispanic_Ratio:H_Black_Only_Ratio,
    names_to = "race", 
    values_to = "abortion"
  )

abortion_race_h = 
  clean_h %>%
  select(-total) %>%
  pivot_longer(
    Hispanic_Ratio:H_Black_Only_Ratio,
    names_to = "race", 
    values_to = "abortion"
  ) %>%
  plot_ly(x = ~race, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>%   layout(title = 'Abortion Ratios by Hispanic Race for Boroughs and New York State', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))

abortion_race_h